library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.7     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(lmerTest)
## Loading required package: lme4
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
## 
##     lmer
## The following object is masked from 'package:stats':
## 
##     step
library(brolgar)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(ggplot2)
library(gapminder)
data <- read_csv("solver.csv")
## Rows: 1614 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): file
## dbl (3): alpha, accuracy, time
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data$file = gsub("/home/sgould/Desktop/projects/Crosswords-BERT/xd/", "\\1", data$file)
ggplot(data  = data,
       aes(x = alpha,
           y = accuracy))+
  geom_point(size = 1.2,
             alpha = .8,
             position = "jitter")+# to add some random noise for plotting purposes
  theme_minimal()+
  labs(title = "Accuracy and alpha values")

ggplot(data  = data,
       aes(x = alpha,
           y = time))+
  geom_point(size = 1.2,
             alpha = .8,
             position = "jitter")+# to add some random noise for plotting purposes
  theme_minimal()+
  labs(title = "Accuracy and time values")

data_100 = head(data, 256)

ggplot(data_100, aes(x=alpha, y=accuracy, color=factor(file))) +
  geom_line() + geom_point() +
  theme_bw()

p <- data %>%
  ggplot( aes(time, alpha, size = accuracy, color=file)) +
  geom_point() +
  theme_bw()

ggplotly(p)
lmm <- lmer(accuracy~alpha+(1|file), data=data)
summary(lmm)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: accuracy ~ alpha + (1 | file)
##    Data: data
## 
## REML criterion at convergence: -7854.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.9514 -0.5051 -0.0171  0.4577  6.3294 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev.
##  file     (Intercept) 0.0125899 0.11220 
##  Residual             0.0003071 0.01753 
## Number of obs: 1614, groups:  file, 92
## 
## Fixed effects:
##               Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)  8.956e-01  1.175e-02  9.218e+01   76.24   <2e-16 ***
## alpha       -3.178e-02  1.653e-03  1.521e+03  -19.23   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##       (Intr)
## alpha -0.082
anova(lmm)
data